package org.hxzon.blogbackup.baidu;

import java.io.IOException;

import org.apache.commons.io.IOUtils;
import org.hxzon.util.DownloadUrl;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class BlogReader {
    public static final String baiduEncoding = "GBK";

    public static Document get(String url) {
        //error when baidu's html is gbk but use gb2312,make traditional Chinese can't read
//		Document doc = Jsoup.connect(BlogWriter.indexBaseUrl + i).get();
        try {
            String html = IOUtils.toString(DownloadUrl.connect(url), baiduEncoding);
//		html=html.replace("content=\"text/html; charset=gb2312\"", "content=\"text/html; charset=gbk\"");
////		DebugUtil.debug(html);
            Document doc = Jsoup.parse(html);
//			DebugUtil.debug(doc.outputSettings().escapeMode().name());
//			doc.outputSettings().escapeMode(EscapeMode.xhtml);//orig is base
//			DebugUtil.debug(doc.outputSettings().escapeMode().name());
            return doc;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
